The data consists of all trees (h >= 2 m) present in a 1 ha plot.
Trees were extracted from a 2018 scan (n = 1018) and a 2019 scan (n =
920) following a major fire event.
With the exception of one extreme outlier, the natural variation in tree
volume was preserved for the establishment of the model.
library(devtools)
library(plyr)
library(scales)
library(tidyr)
library(nlme)
library(dplyr)
library(ggplot2)
library(AICcmodavg)
library(visreg)
library(ggforce)
library(bbmle)
library(lmtest)
library(knitr)
library(performance)
library(caret)
# function to calculate Root Mean Square Error (RMSE) of model predictions
RMSE<-function(Obs,Pred){sqrt(mean((Obs-Pred)^2))}
# function to calculate the relative error (%) of each model prediction
relative_err<-function(Obs,Pred){((Pred-Obs)/Obs)*100}
# remove outlier/ NAs
df <- df[!(df$ID=="2018SOR_449.txt"),]
df <- df %>% drop_na(CrownArea_rTLS)
# rename variables
df$H <- df$Height_rTLS
df$CA <- df$CrownArea_rTLS
df$V <- df$voxel_Volume_VoxR_04
# create compound variable
df$H_CA <- df$H * df$CA
# log H_CA and centre at its mean for use with year as interaction term
df$log_H_CA <- log(df$H_CA)
df$H_CA_log_centred <- scale(df$log_H_CA, scale=FALSE)
df$H_CA_log_centred <- as.numeric(df$H_CA_log_centred)
# assign trees to log scale bins for data binning/ thinning
df$V_class <- cut(log(df$V),
breaks = hist(log(df$V),
breaks = seq(min(log(df$V)-0.01),
max(log(df$V)+0.01),
len=9),
plot=F)$breaks)
df$V_class <- as.numeric(df$V_class)
# Data binning: mean of full data bins to fit final model (include year)
df18 <- filter(df, year == 2018)
df19 <- filter(df, year == 2019)
df_bin_18 <- ddply(df18, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
H_CA_log_centred = mean(H_CA_log_centred),
year = mean(year))
df_bin_19 <- ddply(df19, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
H_CA_log_centred = mean(H_CA_log_centred),
year = mean(year))
df_bin_y <- rbind(df_bin_18, df_bin_19)
# Data thinning: equal random sub-sample of full data bins to fit final model
df_thin <- data.frame()
for (l in 1:8) {
tmp <- filter(df, V_class == l)
sample <- sample_n(tmp, 64, replace = FALSE)
df_thin <- rbind(df_thin,sample)
}
# set reps for Relative error plots
reps <- 100
The relationship between tree height H and total tree volume V was
heteroscedastic. With a log transformation applied to both variables the
relationship is more linear but with a large spread.
The relationship between crown area CA and total tree volume V was also
improved by applying a log transformation to both variables but with
increased spread in the smaller values.
Creating a compound variable H_CA and applying a log transformation to
both variables produced the best linear relationship with total tree
volume V with reduced spread in the lower values.
ggplot(df, aes(x = H, y = V)) + geom_point()
ggplot(df, aes(x = log(H), y = log(V))) + geom_point()
ggplot(df, aes(x = CA, y = V)) + geom_point()
ggplot(df, aes(x = log(CA), y = log(V))) + geom_point()
ggplot(df, aes(x = H_CA, y = V)) + geom_point()
ggplot(df, aes(x = log(H_CA), y = log(V))) + geom_point()
Each model is run 100 times where each rep uses new randomly sampled
cal-val data.
The relative error of each rep is plotted.
Relative error plots are based on Jucker et al. 20171
Raw_data_model<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Raw_data_model)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
### Fit models and generate predictions
M1<-lm(log(V)~log(H_CA),data=Data_model)
Validation_data$V_pred_M1<-exp(predict(M1,Validation_data))
Validation_data$Error_M1<-relative_err(Validation_data$V,Validation_data$V_pred_M1)
lines(smooth.spline(Validation_data$Error_M1 ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25), lwd=0.5)
# Model fit to raw data
Raw_data_model[i,"intercept"]<-coef(M1)[1]
Raw_data_model[i,"beta_log_HCA"]<-coef(M1)[2]
Raw_data_model[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1)
}
Raw_data_model_y<-data.frame(matrix(nrow=reps, ncol=4))
colnames(Raw_data_model_y)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
### Fit models and generate predictions
M1_y<-lm(log(V)~log(H_CA) + factor(year),data=Data_model)
Validation_data$V_pred_M1_y<-exp(predict(M1_y,Validation_data))
Validation_data$Error_M1_y<-relative_err(Validation_data$V,Validation_data$V_pred_M1_y)
lines(smooth.spline(Validation_data$Error_M1_y ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
# Model fit to raw data
Raw_data_model_y[i,"intercept"]<-coef(M1_y)[1]
Raw_data_model_y[i,"beta_log_HCA"]<-coef(M1_y)[2]
Raw_data_model_y[i,"beta_year"]<-coef(M1_y)[3]
Raw_data_model_y[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1_y)
}
Raw_data_modely<-data.frame(matrix(nrow=reps, ncol=4))
colnames(Raw_data_modely)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
### Fit models and generate predictions
M1y<-lm(log(V)~H_CA_log_centred * factor(year),data=Data_model)
Validation_data$V_pred_M1y<-exp(predict(M1y,Validation_data))
Validation_data$Error_M1y<-relative_err(Validation_data$V,Validation_data$V_pred_M1y)
lines(smooth.spline(Validation_data$Error_M1y ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
# Model fit to raw data
Raw_data_modely[i,"intercept"]<-coef(M1y)[1]
Raw_data_modely[i,"beta_log_HCA"]<-coef(M1y)[2]
Raw_data_modely[i,"beta_year"]<-coef(M1y)[3]
Raw_data_modely[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1y)
}
Raw_data_modela<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Raw_data_modela)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
### Fit models and generate predictions
M1a <- lm(log(V)~log(H)+log(CA),data=Data_model)
Validation_data$V_pred_M1a<-exp(predict(M1a,Validation_data))
Validation_data$Error_M1a<-relative_err(Validation_data$V,Validation_data$V_pred_M1a)
lines(smooth.spline(Validation_data$Error_M1a ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
# Model fit to raw data
Raw_data_modela[i,"intercept"]<-coef(M1a)[1]
Raw_data_modela[i,"beta_log_HCA"]<-coef(M1a)[2]
Raw_data_modela[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1a)
}
Raw_data_modelb<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Raw_data_modelb)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
### Fit models and generate predictions
M1b <- lm(log(V)~log(H)*log(CA),data=Data_model)
Validation_data$V_pred_M1b<-exp(predict(M1b,Validation_data))
Validation_data$Error_M1b<-relative_err(Validation_data$V,Validation_data$V_pred_M1b)
lines(smooth.spline(Validation_data$Error_M1b ~ Validation_data$V,nknots=5),col=alpha("chartreuse3",0.25),lwd=0.5)
# Model fit to raw data
Raw_data_modelb[i,"intercept"]<-coef(M1b)[1]
Raw_data_modelb[i,"beta_log_HCA"]<-coef(M1b)[2]
Raw_data_modelb[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M1b)
}
Binned_data_model<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Binned_data_model)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data binning to include year
Data_model18 <- filter(Data_model, year == 2018)
Data_model19 <- filter(Data_model, year == 2019)
Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
# Train model
M2<-lm(log(V)~log(H_CA),data=Data_bin_y)
Validation_data$V_pred_M2<-exp(predict(M2,Validation_data))
Validation_data$Error_M2<-relative_err(Validation_data$V,Validation_data$V_pred_M2)
lines(smooth.spline(Validation_data$Error_M2 ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25),lwd=0.5)
# Model fit to binned data
Binned_data_model[i,"intercept"]<-coef(M2)[1]
Binned_data_model[i,"beta_log_HCA"]<-coef(M2)[2]
Binned_data_model[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2)
}
Binned_data_model_y<-data.frame(matrix(nrow=reps, ncol=4))
colnames(Binned_data_model_y)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data binning to include year
Data_model18 <- filter(Data_model, year == 2018)
Data_model19 <- filter(Data_model, year == 2019)
Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
# Train model
M2_y<-lm(log(V)~log(H_CA) + factor(year),data=Data_bin_y)
Validation_data$V_pred_M2_y<-exp(predict(M2_y,Validation_data))
Validation_data$Error_M2_y<-relative_err(Validation_data$V,Validation_data$V_pred_M2_y)
lines(smooth.spline(Validation_data$Error_M2_y ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25),lwd=0.5)
# Model fit to binned data
Binned_data_model_y[i,"intercept"]<-coef(M2_y)[1]
Binned_data_model_y[i,"beta_log_HCA"]<-coef(M2_y)[2]
Binned_data_model_y[i,"beta_year"]<-coef(M2_y)[3]
Binned_data_model_y[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2_y)
}
Binned_data_modely<-data.frame(matrix(nrow=reps, ncol=4))
colnames(Binned_data_modely)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data binning to include year
Data_model18 <- filter(Data_model, year == 2018)
Data_model19 <- filter(Data_model, year == 2019)
Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA_log_centred = mean(H_CA_log_centred),
year = mean(year))
Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA_log_centred = mean(H_CA_log_centred),
year = mean(year))
Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
# Train model
M2y<-lm(log(V)~H_CA_log_centred * factor(year),data=Data_bin_y)
Validation_data$V_pred_M2y<-exp(predict(M2y,Validation_data))
Validation_data$Error_M2y<-relative_err(Validation_data$V,Validation_data$V_pred_M2y)
lines(smooth.spline(Validation_data$Error_M2y ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25),lwd=0.5)
# Model fit to binned data
Binned_data_modely[i,"intercept"]<-coef(M2y)[1]
Binned_data_modely[i,"beta_log_HCA"]<-coef(M2y)[2]
Binned_data_modely[i,"beta_year"]<-coef(M2y)[3]
Binned_data_modely[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2y)
}
Binned_data_modela<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Binned_data_modela)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data binning to include year
Data_model18 <- filter(Data_model, year == 2018)
Data_model19 <- filter(Data_model, year == 2019)
Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
# Train model
M2a <- lm(log(V)~log(H)+log(CA),data=Data_bin_y)
Validation_data$V_pred_M2a<-exp(predict(M2a,Validation_data))
Validation_data$Error_M2a<-relative_err(Validation_data$V,Validation_data$V_pred_M2a)
lines(smooth.spline(Validation_data$Error_M2a ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25), lwd=0.5)
# Model fit to binned data
Binned_data_modela[i,"intercept"]<-coef(M2a)[1]
Binned_data_modela[i,"beta_log_HCA"]<-coef(M2a)[2]
Binned_data_modela[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2a)
}
Binned_data_modelb<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Binned_data_modelb)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data binning to include year
Data_model18 <- filter(Data_model, year == 2018)
Data_model19 <- filter(Data_model, year == 2019)
Data_bin_18 <- ddply(Data_model18, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_19 <- ddply(Data_model19, .(V_class),summarise,
N_trees =length(V),
V = mean(V),
H = mean(H),
CA = mean(CA),
H_CA = mean(H_CA),
year = mean(year))
Data_bin_y <- rbind(Data_bin_18, Data_bin_19)
# Train model
M2b <- lm(log(V)~log(H)*log(CA),data=Data_bin_y)
Validation_data$V_pred_M2b<-exp(predict(M2b,Validation_data))
Validation_data$Error_M2b<-relative_err(Validation_data$V,Validation_data$V_pred_M2b)
lines(smooth.spline(Validation_data$Error_M2b ~ Validation_data$V,nknots=5),col=alpha("cornflowerblue",0.25), lwd=0.5)
# Model fit to binned data
Binned_data_modelb[i,"intercept"]<-coef(M2b)[1]
Binned_data_modelb[i,"beta_log_HCA"]<-coef(M2b)[2]
Binned_data_modelb[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M2b)
}
Thinned_data_model<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Thinned_data_model)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data thinning: equal random sub-sample of data bins
Data_thin <- data.frame()
for (l in 1:8) {
tmp <- filter(Data_model, V_class == l)
sample <- sample_n(tmp, 40, replace = FALSE)
Data_thin <- rbind(Data_thin,sample)
}
### Fit models and generate predictions
M3<-lm(log(V)~log(H_CA),data=Data_thin)
Validation_data$V_pred_M3<-exp(predict(M3,Validation_data))
Validation_data$Error_M3<-relative_err(Validation_data$V,Validation_data$V_pred_M3)
lines(smooth.spline(Validation_data$Error_M3 ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
# Model fit to raw data
Thinned_data_model[i,"intercept"]<-coef(M3)[1]
Thinned_data_model[i,"beta_log_HCA"]<-coef(M3)[2]
Thinned_data_model[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3)
}
Thinned_data_model_y<-data.frame(matrix(nrow=reps, ncol=4))
colnames(Thinned_data_model_y)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data thinning: equal random sub-sample of data bins
Data_thin <- data.frame()
for (l in 1:8) {
tmp <- filter(Data_model, V_class == l)
sample <- sample_n(tmp, 40, replace = FALSE)
Data_thin <- rbind(Data_thin,sample)
}
### Fit models and generate predictions
M3_y<-lm(log(V)~log(H_CA) + factor(year),data=Data_thin)
Validation_data$V_pred_M3_y<-exp(predict(M3_y,Validation_data))
Validation_data$Error_M3_y<-relative_err(Validation_data$V,Validation_data$V_pred_M3_y)
lines(smooth.spline(Validation_data$Error_M3_y ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
# Model fit to raw data
Thinned_data_model_y[i,"intercept"]<-coef(M3_y)[1]
Thinned_data_model_y[i,"beta_log_HCA"]<-coef(M3_y)[2]
Thinned_data_model_y[i,"beta_year"]<-coef(M3_y)[3]
Thinned_data_model_y[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3_y)
}
Thinned_data_modely<-data.frame(matrix(nrow=reps, ncol=4))
colnames(Thinned_data_modely)[1:4]<-c("intercept","beta_log_HCA","beta_year","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data thinning: equal random sub-sample of data bins
Data_thin <- data.frame()
for (l in 1:8) {
tmp <- filter(Data_model, V_class == l)
sample <- sample_n(tmp, 40, replace = FALSE)
Data_thin <- rbind(Data_thin,sample)
}
### Fit models and generate predictions
M3y<-lm(log(V)~H_CA_log_centred * factor(year),data=Data_thin)
Validation_data$V_pred_M3y<-exp(predict(M3y,Validation_data))
Validation_data$Error_M3y<-relative_err(Validation_data$V,Validation_data$V_pred_M3y)
lines(smooth.spline(Validation_data$Error_M3y ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
# Model fit to raw data
Thinned_data_modely[i,"intercept"]<-coef(M3y)[1]
Thinned_data_modely[i,"beta_log_HCA"]<-coef(M3y)[2]
Thinned_data_modely[i,"beta_year"]<-coef(M3y)[3]
Thinned_data_modely[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3y)
}
Thinned_data_modela<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Thinned_data_modela)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data thinning: equal random sub-sample of data bins
Data_thin <- data.frame()
for (l in 1:8) {
tmp <- filter(Data_model, V_class == l)
sample <- sample_n(tmp, 40, replace = FALSE)
Data_thin <- rbind(Data_thin,sample)
}
### Fit models and generate predictions
M3a <- lm(log(V)~log(H)+log(CA),data=Data_thin)
Validation_data$V_pred_M3a<-exp(predict(M3a,Validation_data))
Validation_data$Error_M3a<-relative_err(Validation_data$V,Validation_data$V_pred_M3a)
lines(smooth.spline(Validation_data$Error_M3a ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
# Model fit to raw data
Thinned_data_modela[i,"intercept"]<-coef(M3a)[1]
Thinned_data_modela[i,"beta_log_HCA"]<-coef(M3a)[2]
Thinned_data_modela[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3a)
}
Thinned_data_modelb<-data.frame(matrix(nrow=reps, ncol=3))
colnames(Thinned_data_modelb)[1:3]<-c("intercept","beta_log_HCA","RMSE")
plot(1,1,log="x", pch=16, col="white",bty="l",xlim=c(1,25),ylim=c(-100,100),
xlab="Observed volume (m3)",ylab="Error (%)",las=1,cex.axis=0.8,cex.lab=0.9)
grid(equilogs=FALSE,lty=2,col="grey80",lwd=0.5)
abline(h=0,col="grey20",lty=2)
for (i in 1:reps){
# Set up validation data
Validation_data <- data.frame()
for (j in 1:8) {
tmp <- filter(df, V_class == j)
sample <- sample_n(tmp, 24, replace = FALSE)
Validation_data <- rbind(Validation_data,sample)
}
# use rest to train model
Model_trees<-data.frame(ID=setdiff(df$ID,Validation_data$ID))
Data_model<-merge(df,Model_trees) # data for model fitting (90% of trees)
# Data thinning: equal random sub-sample of data bins
Data_thin <- data.frame()
for (l in 1:8) {
tmp <- filter(Data_model, V_class == l)
sample <- sample_n(tmp, 40, replace = FALSE)
Data_thin <- rbind(Data_thin,sample)
}
### Fit models and generate predictions
M3b <- lm(log(V)~log(H)*log(CA),data=Data_thin)
Validation_data$V_pred_M3b<-exp(predict(M3b,Validation_data))
Validation_data$Error_M3b<-relative_err(Validation_data$V,Validation_data$V_pred_M3b)
lines(smooth.spline(Validation_data$Error_M3b ~ Validation_data$V,nknots=5),col=alpha("deeppink1",0.25),lwd=0.5)
# Model fit to raw data
Thinned_data_modelb[i,"intercept"]<-coef(M3b)[1]
Thinned_data_modelb[i,"beta_log_HCA"]<-coef(M3b)[2]
Thinned_data_modelb[i,"RMSE"]<-RMSE(Validation_data$V,Validation_data$V_pred_M3b)
}
Mean RMSE (m3) for the 100 reps.
min(df$V)
## [1] 0.00256
max(df$V)
## [1] 23.05142
RM1 <- mean(Raw_data_model$RMSE)
RM1_y <- mean(Raw_data_model_y$RMSE)
RM1y <- mean(Raw_data_modely$RMSE)
RM1a <- mean(Raw_data_modela$RMSE)
RM1b <- mean(Raw_data_modelb$RMSE)
RM1all <- c(RM1, RM1_y, RM1y, RM1a, RM1b)
RM2 <- mean(Binned_data_model$RMSE)
RM2_y <- mean(Binned_data_model_y$RMSE)
RM2y <- mean(Binned_data_modely$RMSE)
RM2a <- mean(Binned_data_modela$RMSE)
RM2b <- mean(Binned_data_modelb$RMSE)
RM2all <- c(RM2, RM2_y, RM2y, RM2a, RM2b)
RM3 <- mean(Thinned_data_model$RMSE)
RM3_y <- mean(Thinned_data_model_y$RMSE)
RM3y <- mean(Thinned_data_modely$RMSE)
RM3a <- mean(Thinned_data_modela$RMSE)
RM3b <- mean(Thinned_data_modelb$RMSE)
RM3all <- c(RM3, RM3_y, RM3y, RM3a, RM3b)
# make data frame
data <- c("model", "raw data", "binned data", "thinned data")
model <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
dfR <- data.frame(model, RM1all, RM2all, RM3all)
colnames(dfR) <- data
kable(dfR)
| model | raw data | binned data | thinned data |
|---|---|---|---|
| M_CA | 1.973366 | 1.645145 | 1.632187 |
| M_CA + year | 1.788147 | 1.545326 | 1.501676 |
| M_CA * year | 2.010889 | 1.917560 | 1.635667 |
| M + CA | 1.963079 | 1.646094 | 1.643541 |
| M * CA | 1.771526 | 1.756127 | 1.647579 |
M1 <- lm(log(V) ~ log(H_CA), data = df)
summary(M1)
##
## Call:
## lm(formula = log(V) ~ log(H_CA), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.22628 -0.47538 0.02432 0.46293 2.17167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.937141 0.018406 -159.6 <2e-16 ***
## log(H_CA) 0.720053 0.005992 120.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.627 on 1903 degrees of freedom
## Multiple R-squared: 0.8836, Adjusted R-squared: 0.8835
## F-statistic: 1.444e+04 on 1 and 1903 DF, p-value: < 2.2e-16
M2 <- lm(log(V) ~ log(H_CA), data = df_bin_y)
summary(M2)
##
## Call:
## lm(formula = log(V) ~ log(H_CA), data = df_bin_y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37410 -0.28781 0.04137 0.20109 0.48828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.30143 0.09159 -36.05 3.29e-15 ***
## log(H_CA) 0.80113 0.02304 34.77 5.43e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2878 on 14 degrees of freedom
## Multiple R-squared: 0.9885, Adjusted R-squared: 0.9877
## F-statistic: 1209 on 1 and 14 DF, p-value: 5.434e-15
M3 <- lm(log(V) ~ log(H_CA), data = df_thin)
summary(M3)
##
## Call:
## lm(formula = log(V) ~ log(H_CA), data = df_thin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.28449 -0.43784 0.06039 0.42141 2.08901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.916855 0.032497 -89.76 <2e-16 ***
## log(H_CA) 0.744127 0.008336 89.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6259 on 510 degrees of freedom
## Multiple R-squared: 0.9398, Adjusted R-squared: 0.9397
## F-statistic: 7968 on 1 and 510 DF, p-value: < 2.2e-16
M1_y<-lm(log(V)~log(H_CA) + factor(year),data=df)
summary(M1_y)
##
## Call:
## lm(formula = log(V) ~ log(H_CA) + factor(year), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.44224 -0.38583 0.01784 0.36707 2.40993
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.728729 0.021023 -129.80 <2e-16 ***
## log(H_CA) 0.724841 0.005586 129.76 <2e-16 ***
## factor(year)2019 -0.459075 0.026820 -17.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5838 on 1902 degrees of freedom
## Multiple R-squared: 0.8991, Adjusted R-squared: 0.899
## F-statistic: 8475 on 2 and 1902 DF, p-value: < 2.2e-16
M2_y<-lm(log(V)~log(H_CA) + factor(year),data=df_bin_y)
summary(M2_y)
##
## Call:
## lm(formula = log(V) ~ log(H_CA) + factor(year), data = df_bin_y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43418 -0.15925 -0.01401 0.16980 0.40453
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.14420 0.09339 -33.667 4.94e-14 ***
## log(H_CA) 0.80474 0.01889 42.611 2.37e-15 ***
## factor(year)2019 -0.33226 0.11791 -2.818 0.0145 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2353 on 13 degrees of freedom
## Multiple R-squared: 0.9929, Adjusted R-squared: 0.9918
## F-statistic: 907.9 on 2 and 13 DF, p-value: 1.088e-14
M3_y<-lm(log(V)~log(H_CA) + factor(year),data=df_thin)
summary(M3_y)
##
## Call:
## lm(formula = log(V) ~ log(H_CA) + factor(year), data = df_thin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.44122 -0.35008 0.02093 0.36647 2.34024
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.769211 0.037072 -74.697 < 2e-16 ***
## log(H_CA) 0.749891 0.007984 93.920 < 2e-16 ***
## factor(year)2019 -0.390583 0.053903 -7.246 1.6e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5965 on 509 degrees of freedom
## Multiple R-squared: 0.9455, Adjusted R-squared: 0.9453
## F-statistic: 4413 on 2 and 509 DF, p-value: < 2.2e-16
M1y<-lm(log(V)~H_CA_log_centred * factor(year),data=df)
summary(M1y)
##
## Call:
## lm(formula = log(V) ~ H_CA_log_centred * factor(year), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43451 -0.37455 -0.00658 0.35573 2.43725
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.332759 0.018202 -73.221 < 2e-16 ***
## H_CA_log_centred 0.758280 0.007108 106.684 < 2e-16 ***
## factor(year)2019 -0.456530 0.026447 -17.262 < 2e-16 ***
## H_CA_log_centred:factor(year)2019 -0.083694 0.011245 -7.443 1.48e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5756 on 1901 degrees of freedom
## Multiple R-squared: 0.902, Adjusted R-squared: 0.9018
## F-statistic: 5830 on 3 and 1901 DF, p-value: < 2.2e-16
M2y<-lm(log(V)~H_CA_log_centred * factor(year),data=df_bin_y)
summary(M2y)
##
## Call:
## lm(formula = log(V) ~ H_CA_log_centred * factor(year), data = df_bin_y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23417 -0.15098 -0.02025 0.10949 0.35513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.32647 0.07008 -18.928 2.65e-10 ***
## H_CA_log_centred 0.80223 0.02196 36.532 1.13e-13 ***
## factor(year)2019 -0.28654 0.09944 -2.881 0.0138 *
## H_CA_log_centred:factor(year)2019 -0.04818 0.03073 -1.568 0.1429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1982 on 12 degrees of freedom
## Multiple R-squared: 0.9953, Adjusted R-squared: 0.9942
## F-statistic: 854.9 on 3 and 12 DF, p-value: 2.986e-14
M3y<-lm(log(V)~H_CA_log_centred * factor(year),data=df_thin)
summary(M3y)
##
## Call:
## lm(formula = log(V) ~ H_CA_log_centred * factor(year), data = df_thin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43874 -0.33668 0.00062 0.35900 2.32077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.327052 0.034161 -38.847 < 2e-16 ***
## H_CA_log_centred 0.762556 0.009722 78.437 < 2e-16 ***
## factor(year)2019 -0.379067 0.053927 -7.029 6.72e-12 ***
## H_CA_log_centred:factor(year)2019 -0.038270 0.016900 -2.265 0.024 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5941 on 508 degrees of freedom
## Multiple R-squared: 0.946, Adjusted R-squared: 0.9457
## F-statistic: 2967 on 3 and 508 DF, p-value: < 2.2e-16
M1a <- lm(log(V)~log(H)+log(CA),data=df)
summary(M1a)
##
## Call:
## lm(formula = log(V) ~ log(H) + log(CA), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.24551 -0.47381 0.02379 0.46565 2.14827
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.85558 0.05541 -51.54 <2e-16 ***
## log(H) 0.66939 0.03301 20.28 <2e-16 ***
## log(CA) 0.73622 0.01197 61.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6267 on 1902 degrees of freedom
## Multiple R-squared: 0.8837, Adjusted R-squared: 0.8836
## F-statistic: 7228 on 2 and 1902 DF, p-value: < 2.2e-16
M2a <- lm(log(V)~log(H)+log(CA),data=df_bin_y)
summary(M2a)
##
## Call:
## lm(formula = log(V) ~ log(H) + log(CA), data = df_bin_y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36830 -0.23349 -0.01498 0.21028 0.50700
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.8953 0.9275 -3.122 0.008102 **
## log(H) 0.5693 0.5660 1.006 0.332853
## log(CA) 0.8812 0.1802 4.889 0.000296 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2954 on 13 degrees of freedom
## Multiple R-squared: 0.9888, Adjusted R-squared: 0.9871
## F-statistic: 573.6 on 2 and 13 DF, p-value: 2.094e-13
M3a <- lm(log(V)~log(H)+log(CA),data=df_thin)
summary(M3a)
##
## Call:
## lm(formula = log(V) ~ log(H) + log(CA), data = df_thin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.21624 -0.39669 0.05547 0.38683 2.16591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.17619 0.11855 -26.79 <2e-16 ***
## log(H) 0.89842 0.06836 13.14 <2e-16 ***
## log(CA) 0.69976 0.02120 33.00 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6234 on 509 degrees of freedom
## Multiple R-squared: 0.9405, Adjusted R-squared: 0.9402
## F-statistic: 4019 on 2 and 509 DF, p-value: < 2.2e-16
M1b <- lm(log(V)~log(H)*log(CA),data=df)
summary(M1b)
##
## Call:
## lm(formula = log(V) ~ log(H) * log(CA), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.22352 -0.47030 0.02357 0.46002 2.18969
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.84068 0.05556 -51.130 < 2e-16 ***
## log(H) 0.64297 0.03426 18.769 < 2e-16 ***
## log(CA) 0.68754 0.02098 32.776 < 2e-16 ***
## log(H):log(CA) 0.03069 0.01087 2.822 0.00482 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6256 on 1901 degrees of freedom
## Multiple R-squared: 0.8842, Adjusted R-squared: 0.884
## F-statistic: 4839 on 3 and 1901 DF, p-value: < 2.2e-16
M2b <- lm(log(V)~log(H)*log(CA),data=df_bin_y)
summary(M2b)
##
## Call:
## lm(formula = log(V) ~ log(H) * log(CA), data = df_bin_y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32815 -0.22466 -0.06013 0.21588 0.56332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.63800 1.04419 -2.526 0.026596 *
## log(H) 0.37765 0.66330 0.569 0.579623
## log(CA) 0.87134 0.18560 4.695 0.000519 ***
## log(H):log(CA) 0.03444 0.05763 0.598 0.561246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.303 on 12 degrees of freedom
## Multiple R-squared: 0.9891, Adjusted R-squared: 0.9864
## F-statistic: 363.6 on 3 and 12 DF, p-value: 4.843e-12
M3b <- lm(log(V)~log(H)*log(CA),data=df_thin)
summary(M3b)
##
## Call:
## lm(formula = log(V) ~ log(H) * log(CA), data = df_thin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1873 -0.3989 0.0380 0.3997 2.1997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.15305 0.11942 -26.403 <2e-16 ***
## log(H) 0.85939 0.07312 11.754 <2e-16 ***
## log(CA) 0.66516 0.03141 21.178 <2e-16 ***
## log(H):log(CA) 0.02400 0.01609 1.492 0.136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6226 on 508 degrees of freedom
## Multiple R-squared: 0.9407, Adjusted R-squared: 0.9404
## F-statistic: 2687 on 3 and 508 DF, p-value: < 2.2e-16
check_model(M1)
check_model(M1_y)
check_model(M1y)
check_model(M1a)
check_model(M1b)
check_model(M2)
check_model(M2_y)
check_model(M2y)
check_model(M2a)
check_model(M2b)
check_model(M3)
check_model(M3_y)
check_model(M3y)
check_model(M3a)
check_model(M3b)
Studentized Breusch-Pagan test for heteroscedasticity.
p < 0.05 means heteroscedasticity is a problem for the model.
statM1 <- bptest(M1)[[1]]
statM1_y <- bptest(M1_y)[[1]]
statM1y <- bptest(M1y)[[1]]
statM1a <- bptest(M1a)[[1]]
statM1b <- bptest(M1b)[[1]]
stat1 <- c(statM1[[1]], statM1_y[[1]], statM1y[[1]], statM1a[[1]], statM1b[[1]])
dfM1 <- bptest(M1)[[2]]
dfM1_y <- bptest(M1_y)[[2]]
dfM1y <- bptest(M1y)[[2]]
dfM1a <- bptest(M1a)[[2]]
dfM1b <- bptest(M1b)[[2]]
df1 <- c(dfM1[[1]], dfM1_y[[1]], dfM1y[[1]], dfM1a[[1]], dfM1b[[1]])
pM1 <- bptest(M1)[[4]]
pM1_y <- bptest(M1_y)[[4]]
pM1y <- bptest(M1y)[[4]]
pM1a <- bptest(M1a)[[4]]
pM1b <- bptest(M1b)[[4]]
p1 <- c(pM1[[1]], pM1_y[[1]], pM1y[[1]], pM1a[[1]], pM1b[[1]])
rown <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
bp1 <- data.frame(rown, stat1, df1, p1)
coln <- c("model","BP", "df", "p-value")
colnames(bp1) <- coln
kable(bp1)
| model | BP | df | p-value |
|---|---|---|---|
| M_CA | 12.73989 | 1 | 0.0003579 |
| M_CA + year | 53.13540 | 2 | 0.0000000 |
| M_CA * year | 68.69836 | 3 | 0.0000000 |
| M + CA | 18.16733 | 2 | 0.0001135 |
| M * CA | 36.53715 | 3 | 0.0000001 |
statM2 <- bptest(M2)[[1]]
statM2_y <- bptest(M2_y)[[1]]
statM2y <- bptest(M2y)[[1]]
statM2a <- bptest(M2a)[[1]]
statM2b <- bptest(M2b)[[1]]
stat2 <- c(statM2[[1]], statM2_y[[1]], statM2y[[1]], statM2a[[1]], statM2b[[1]])
dfM2 <- bptest(M2)[[2]]
dfM2_y <- bptest(M2_y)[[2]]
dfM2y <- bptest(M2y)[[2]]
dfM2a <- bptest(M2a)[[2]]
dfM2b <- bptest(M2b)[[2]]
df2 <- c(dfM2[[1]], dfM2_y[[1]], dfM2y[[1]], dfM2a[[1]], dfM2b[[1]])
pM2 <- bptest(M2)[[4]]
pM2_y <- bptest(M2_y)[[4]]
pM2y <- bptest(M2y)[[4]]
pM2a <- bptest(M2a)[[4]]
pM2b <- bptest(M2b)[[4]]
p2 <- c(pM2[[1]], pM2_y[[1]], pM2y[[1]], pM2a[[1]], pM2b[[1]])
rown <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
bp2 <- data.frame(rown, stat2, df2, p2)
coln <- c("model","BP", "df", "p-value")
colnames(bp2) <- coln
kable(bp2)
| model | BP | df | p-value |
|---|---|---|---|
| M_CA | 0.8231927 | 1 | 0.3642484 |
| M_CA + year | 2.4746043 | 2 | 0.2901660 |
| M_CA * year | 4.2440677 | 3 | 0.2362867 |
| M + CA | 1.0411573 | 2 | 0.5941766 |
| M * CA | 4.6880527 | 3 | 0.1961173 |
statM3 <- bptest(M3)[[1]]
statM3_y <- bptest(M3_y)[[1]]
statM3y <- bptest(M3y)[[1]]
statM3a <- bptest(M3a)[[1]]
statM3b <- bptest(M3b)[[1]]
stat3 <- c(statM3[[1]], statM3_y[[1]], statM3y[[1]], statM3a[[1]], statM3b[[1]])
dfM3 <- bptest(M3)[[2]]
dfM3_y <- bptest(M3_y)[[2]]
dfM3y <- bptest(M3y)[[2]]
dfM3a <- bptest(M3a)[[2]]
dfM3b <- bptest(M3b)[[2]]
df3 <- c(dfM3[[1]], dfM3_y[[1]], dfM3y[[1]], dfM3a[[1]], dfM3b[[1]])
pM3 <- bptest(M3)[[4]]
pM3_y <- bptest(M3_y)[[4]]
pM3y <- bptest(M3y)[[4]]
pM3a <- bptest(M3a)[[4]]
pM3b <- bptest(M3b)[[4]]
p3 <- c(pM3[[1]], pM3_y[[1]], pM3y[[1]], pM3a[[1]], pM3b[[1]])
rown <- c("M_CA", "M_CA + year", "M_CA * year", "M + CA", "M * CA")
bp3 <- data.frame(rown, stat3, df3, p3)
coln <- c("model","BP", "df", "p-value")
colnames(bp3) <- coln
kable(bp3)
| model | BP | df | p-value |
|---|---|---|---|
| M_CA | 13.30107 | 1 | 0.0002653 |
| M_CA + year | 20.94477 | 2 | 0.0000283 |
| M_CA * year | 22.13246 | 3 | 0.0000612 |
| M + CA | 8.60780 | 2 | 0.0135157 |
| M * CA | 22.30937 | 3 | 0.0000562 |
Models:
M = H_CA
M_y = H_CA + year
My = H_CA * year
Ma = H + CA
Mb = H * CA
AICctab(M1, M1_y, M1y, M1a, M1b, base = TRUE)
## AICc dAICc df
## M1y 3307.7 0.0 5
## M1_y 3360.5 52.7 4
## M1b 3625.0 317.2 5
## M1a 3631.0 323.2 4
## M1 3631.4 323.6 3
AICctab(M2, M2_y, M2y, M2a, M2b, base = TRUE)
## AICc dAICc df
## M2y 5.0 0.0 5
## M2_y 7.4 2.4 4
## M2 11.4 6.4 3
## M2a 14.7 9.7 4
## M2b 18.6 13.6 5
AICctab(M3, M3_y, M3y, M3a, M3b, base = TRUE)
## AICc dAICc df
## M3y 925.9 0.0 5
## M3_y 929.0 3.1 4
## M3b 973.9 48.0 5
## M3a 974.1 48.2 4
## M3 977.2 51.3 3
M1 = raw data
M2 = binned data
M3 = thinned data
M = H_CA
M_y = H_CA + year
My = H_CA * year
Ma = H + CA
Mb = H * CA
pred1 <- predict(M1, newdata = df, se.fit = TRUE)
fit1 <- pred1$fit
se1 <- pred1$se.fit
preddata1 <- data.frame(df, fit=fit1, se=se1)
critval <- 1.96 ## approx 95% CI
preddata1$upper <- preddata1$fit + (critval * preddata1$se)
preddata1$lower <- preddata1$fit - (critval * preddata1$se)
pred2 <- predict(M2, newdata = df_bin_y, se.fit = TRUE)
fit2 <- pred2$fit
se2 <- pred2$se.fit
preddata2 <- data.frame(df_bin_y, fit=fit2, se=se2)
critval <- 1.96 ## approx 95% CI
preddata2$upper <- preddata2$fit + (critval * preddata2$se)
preddata2$lower <- preddata2$fit - (critval * preddata2$se)
pred3 <- predict(M3, newdata =df_thin, se.fit = TRUE)
fit3 <- pred3$fit
se3 <- pred3$se.fit
preddata3 <- data.frame(df_thin, fit=fit3, se=se3)
critval <- 1.96 ## approx 95% CI
preddata3$upper <- preddata3$fit + (critval * preddata3$se)
preddata3$lower <- preddata3$fit - (critval * preddata3$se)
pred1_y <- predict(M1_y, newdata = df, se.fit = TRUE)
fit1_y <- pred1_y$fit
se1_y <- pred1_y$se.fit
preddata1_y <- data.frame(df, fit=fit1_y, se=se1_y)
critval <- 1.96 ## approx 95% CI
preddata1_y$upper <- preddata1_y$fit + (critval * preddata1_y$se)
preddata1_y$lower <- preddata1_y$fit - (critval * preddata1_y$se)
pred2_y <- predict(M2_y, newdata = df_bin_y, se.fit = TRUE)
fit2_y <- pred2_y$fit
se2_y <- pred2_y$se.fit
preddata2_y <- data.frame(df_bin_y, fit=fit2_y, se=se2_y)
critval <- 1.96 ## approx 95% CI
preddata2_y$upper <- preddata2_y$fit + (critval * preddata2_y$se)
preddata2_y$lower <- preddata2_y$fit - (critval * preddata2_y$se)
pred3_y <- predict(M3_y, newdata = df_thin, se.fit = TRUE)
fit3_y <- pred3_y$fit
se3_y <- pred3_y$se.fit
preddata3_y <- data.frame(df_thin, fit=fit3_y, se=se3_y)
critval <- 1.96 ## approx 95% CI
preddata3_y$upper <- preddata3_y$fit + (critval * preddata3_y$se)
preddata3_y$lower <- preddata3_y$fit - (critval * preddata3_y$se)
pred1y <- predict(M1y, newdata = df, se.fit = TRUE)
fit1y <- pred1y$fit
se1y <- pred1y$se.fit
preddata1y <- data.frame(df, fit=fit1_y, se=se1y)
critval <- 1.96 ## approx 95% CI
preddata1y$upper <- preddata1y$fit + (critval * preddata1y$se)
preddata1y$lower <- preddata1y$fit - (critval * preddata1y$se)
pred2y <- predict(M2y, newdata = df_bin_y, se.fit = TRUE)
fit2y <- pred2y$fit
se2y <- pred2y$se.fit
preddata2y <- data.frame(df_bin_y, fit=fit2y, se=se2y)
critval <- 1.96 ## approx 95% CI
preddata2y$upper <- preddata2y$fit + (critval * preddata2y$se)
preddata2y$lower <- preddata2y$fit - (critval * preddata2y$se)
pred3y <- predict(M3y, newdata = df_thin, se.fit = TRUE)
fit3y <- pred3y$fit
se3y <- pred3y$se.fit
preddata3y <- data.frame(df_thin, fit=fit3y, se=se3y)
critval <- 1.96 ## approx 95% CI
preddata3y$upper <- preddata3y$fit + (critval * preddata3y$se)
preddata3y$lower <- preddata3y$fit - (critval * preddata3y$se)
pred1a <- predict(M1a, newdata = df, se.fit = TRUE)
fit1a <- pred1a$fit
se1a <- pred1a$se.fit
preddata1a <- data.frame(df, fit=fit1a, se=se1a)
critval <- 1.96 ## approx 95% CI
preddata1a$upper <- preddata1a$fit + (critval * preddata1a$se)
preddata1a$lower <- preddata1a$fit - (critval * preddata1a$se)
pred2a <- predict(M2a, newdata = df_bin_y, se.fit = TRUE)
fit2a <- pred2a$fit
se2a <- pred2a$se.fit
preddata2a <- data.frame(df_bin_y, fit=fit2a, se=se2a)
critval <- 1.96 ## approx 95% CI
preddata2a$upper <- preddata2a$fit + (critval * preddata2a$se)
preddata2a$lower <- preddata2a$fit - (critval * preddata2a$se)
pred3a <- predict(M3a, newdata = df_thin, se.fit = TRUE)
fit3a <- pred3a$fit
se3a <- pred3a$se.fit
preddata3a <- data.frame(df_thin, fit=fit3a, se=se3a)
critval <- 1.96 ## approx 95% CI
preddata3a$upper <- preddata3a$fit + (critval * preddata3a$se)
preddata3a$lower <- preddata3a$fit - (critval * preddata3a$se)
pred1b <- predict(M1b, newdata = df, se.fit = TRUE)
fit1b <- pred1b$fit
se1b <- pred1b$se.fit
preddata1b <- data.frame(df, fit=fit1b, se=se1b)
critval <- 1.96 ## approx 95% CI
preddata1b$upper <- preddata1b$fit + (critval * preddata1b$se)
preddata1b$lower <- preddata1b$fit - (critval * preddata1b$se)
pred2b <- predict(M2b, newdata = df_bin_y, se.fit = TRUE)
fit2b <- pred2b$fit
se2b <- pred2b$se.fit
preddata2b <- data.frame(df_bin_y, fit=fit2b, se=se2b)
critval <- 1.96 ## approx 95% CI
preddata2b$upper <- preddata2b$fit + (critval * preddata2b$se)
preddata2b$lower <- preddata2b$fit - (critval * preddata2b$se)
pred3b <- predict(M3b, newdata = df_thin, se.fit = TRUE)
fit3b <- pred3b$fit
se3b <- pred3b$se.fit
preddata3b <- data.frame(df_thin, fit=fit3b, se=se3b)
critval <- 1.96 ## approx 95% CI
preddata3b$upper <- preddata3b$fit + (critval * preddata3b$se)
preddata3b$lower <- preddata3b$fit - (critval * preddata3b$se)
ggplot(preddata1, aes(x = log(V), y = fit1)) + geom_point(alpha=0.2) +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(preddata2, aes(x = log(V), y = fit2)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata3, aes(x = log(V), y = fit3)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata1_y, aes(x = log(V), y = fit1_y)) + geom_point(alpha=0.2) +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(preddata2_y, aes(x = log(V), y = fit2_y)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata3_y, aes(x = log(V), y = fit3_y)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata1y, aes(x = log(V), y = fit1y)) + geom_point(alpha=0.2) +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(preddata2y, aes(x = log(V), y = fit2y)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata3y, aes(x = log(V), y = fit3y)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata1a, aes(x = log(V), y = fit1a)) + geom_point(alpha=0.2) +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(preddata2a, aes(x = log(V), y = fit2a)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata3a, aes(x = log(V), y = fit3a)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata1b, aes(x = log(V), y = fit1b)) + geom_point(alpha=0.2) +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(preddata2b, aes(x = log(V), y = fit2b)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(preddata3b, aes(x = log(V), y = fit3b)) + geom_point() +
geom_smooth() +
theme_bw()+
theme(aspect.ratio = 1) +
geom_abline(slope = 1, intercept = 0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Jucker, Tommaso, et al. “Allometric equations for integrating remote sensing imagery into forest monitoring programmes.” Global change biology 23.1 (2017): 177-190. https://doi.org/10.1111/gcb.13388↩︎